/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.analysis.bn;

import static org.apache.lucene.analysis.util.StemmerUtil.endsWith;

/**
 * Stemmer for Bengali.
 *
 * <p>The algorithm is based on the report in: <i>Natural Language Processing in an Indian Language
 * (Bengali)-I: Verb Phrase Analysis</i> P Sengupta and B B Chaudhuri
 *
 * <p>Few Stemmer criteria are taken from:
 * <i>http://members.unine.ch/jacques.savoy/clef/BengaliStemmerLight.java.txt</i>
 */
class BengaliStemmer {
  int stem(char[] buffer, int len) {

    // 8
    if (len > 9
        && (endsWith(buffer, len, "িয়াছিলাম")
            || endsWith(buffer, len, "িতেছিলাম")
            || endsWith(buffer, len, "িতেছিলেন")
            || endsWith(buffer, len, "ইতেছিলেন")
            || endsWith(buffer, len, "িয়াছিলেন")
            || endsWith(buffer, len, "ইয়াছিলেন"))) return len - 8;

    // 7
    if ((len > 8)
        && (endsWith(buffer, len, "িতেছিলি")
            || endsWith(buffer, len, "িতেছিলে")
            || endsWith(buffer, len, "িয়াছিলা")
            || endsWith(buffer, len, "িয়াছিলে")
            || endsWith(buffer, len, "িতেছিলা")
            || endsWith(buffer, len, "িয়াছিলি")
            || endsWith(buffer, len, "য়েদেরকে"))) return len - 7;

    // 6
    if ((len > 7)
        && (endsWith(buffer, len, "িতেছিস")
            || endsWith(buffer, len, "িতেছেন")
            || endsWith(buffer, len, "িয়াছিস")
            || endsWith(buffer, len, "িয়াছেন")
            || endsWith(buffer, len, "েছিলাম")
            || endsWith(buffer, len, "েছিলেন")
            || endsWith(buffer, len, "েদেরকে"))) return len - 6;

    // 5
    if ((len > 6)
        && (endsWith(buffer, len, "িতেছি")
            || endsWith(buffer, len, "িতেছা")
            || endsWith(buffer, len, "িতেছে")
            || endsWith(buffer, len, "ছিলাম")
            || endsWith(buffer, len, "ছিলেন")
            || endsWith(buffer, len, "িয়াছি")
            || endsWith(buffer, len, "িয়াছা")
            || endsWith(buffer, len, "িয়াছে")
            || endsWith(buffer, len, "েছিলে")
            || endsWith(buffer, len, "েছিলা")
            || endsWith(buffer, len, "য়েদের")
            || endsWith(buffer, len, "দেরকে"))) return len - 5;

    // 4
    if ((len > 5)
        && (endsWith(buffer, len, "িলাম")
            || endsWith(buffer, len, "িলেন")
            || endsWith(buffer, len, "িতাম")
            || endsWith(buffer, len, "িতেন")
            || endsWith(buffer, len, "িবেন")
            || endsWith(buffer, len, "ছিলি")
            || endsWith(buffer, len, "ছিলে")
            || endsWith(buffer, len, "ছিলা")
            || endsWith(buffer, len, "তেছে")
            || endsWith(buffer, len, "িতেছ")
            || endsWith(buffer, len, "খানা")
            || endsWith(buffer, len, "খানি")
            || endsWith(buffer, len, "গুলো")
            || endsWith(buffer, len, "গুলি")
            || endsWith(buffer, len, "য়েরা")
            || endsWith(buffer, len, "েদের"))) return len - 4;

    // 3
    if ((len > 4)
        && (endsWith(buffer, len, "লাম")
            || endsWith(buffer, len, "িলি")
            || endsWith(buffer, len, "ইলি")
            || endsWith(buffer, len, "িলে")
            || endsWith(buffer, len, "ইলে")
            || endsWith(buffer, len, "লেন")
            || endsWith(buffer, len, "িলা")
            || endsWith(buffer, len, "ইলা")
            || endsWith(buffer, len, "তাম")
            || endsWith(buffer, len, "িতি")
            || endsWith(buffer, len, "ইতি")
            || endsWith(buffer, len, "িতে")
            || endsWith(buffer, len, "ইতে")
            || endsWith(buffer, len, "তেন")
            || endsWith(buffer, len, "িতা")
            || endsWith(buffer, len, "িবা")
            || endsWith(buffer, len, "ইবা")
            || endsWith(buffer, len, "িবি")
            || endsWith(buffer, len, "ইবি")
            || endsWith(buffer, len, "বেন")
            || endsWith(buffer, len, "িবে")
            || endsWith(buffer, len, "ইবে")
            || endsWith(buffer, len, "ছেন")
            || endsWith(buffer, len, "য়োন")
            || endsWith(buffer, len, "য়ের")
            || endsWith(buffer, len, "েরা")
            || endsWith(buffer, len, "দের"))) return len - 3;

    // 2
    if ((len > 3)
        && (endsWith(buffer, len, "িস")
            || endsWith(buffer, len, "েন")
            || endsWith(buffer, len, "লি")
            || endsWith(buffer, len, "লে")
            || endsWith(buffer, len, "লা")
            || endsWith(buffer, len, "তি")
            || endsWith(buffer, len, "তে")
            || endsWith(buffer, len, "তা")
            || endsWith(buffer, len, "বি")
            || endsWith(buffer, len, "বে")
            || endsWith(buffer, len, "বা")
            || endsWith(buffer, len, "ছি")
            || endsWith(buffer, len, "ছা")
            || endsWith(buffer, len, "ছে")
            || endsWith(buffer, len, "ুন")
            || endsWith(buffer, len, "ুক")
            || endsWith(buffer, len, "টা")
            || endsWith(buffer, len, "টি")
            || endsWith(buffer, len, "নি")
            || endsWith(buffer, len, "ের")
            || endsWith(buffer, len, "তে")
            || endsWith(buffer, len, "রা")
            || endsWith(buffer, len, "কে"))) return len - 2;

    // 1
    if ((len > 2)
        && (endsWith(buffer, len, "ি")
            || endsWith(buffer, len, "ী")
            || endsWith(buffer, len, "া")
            || endsWith(buffer, len, "ো")
            || endsWith(buffer, len, "ে")
            || endsWith(buffer, len, "ব")
            || endsWith(buffer, len, "ত"))) return len - 1;

    return len;
  }
}
